{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70261773",
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "##Title: Saginaw County ROA Webscraper and Parser\n",
    "##Author: Arkey Barnett\n",
    "##Date Last Modified: 01/09/2024\n",
    "##Purpose: To obtain Saginaw County court history events for scheduling instrument, we aim to scrape and parse ROAs, court history records. \n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33c4c430",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################################################################################################\n",
    "##Importing the Packages\n",
    "################################################################################################################\n",
    "import requests;\n",
    "from lxml import etree;\n",
    "import pandas as pd;\n",
    "import numpy as np;\n",
    "from lxml import html;\n",
    "from lxml.html import fromstring\n",
    "import string;\n",
    "#import requests_file;\n",
    "import time;\n",
    "import glob, os;\n",
    "import re;\n",
    "from datetime import datetime;\n",
    "import math;\n",
    "##Ran the following into my terminal\n",
    "#python -m pip install selenium\n",
    "#python -m pip install webdriver-manager\n",
    "## Location of modules: ./opt/anaconda3/lib/python3.9/site-packages\n",
    "##pip install selenium\n",
    "##pip install webdriver-manager\n",
    "import time; ##allows python to rest between moves…\n",
    "from selenium import webdriver; ##actual webdriver we are going to use\n",
    "from selenium.webdriver.chrome.service import Service; ##best practice way to do it right now\n",
    "from selenium.webdriver.chrome.options import Options;\n",
    "from selenium.webdriver.support.ui import Select; #https://stackoverflow.com/questions/32382415/selenium-python-select\n",
    "from webdriver_manager.chrome import ChromeDriverManager;\n",
    "from selenium.webdriver.common.by import By;\n",
    "from selenium.webdriver.common.keys import Keys;\n",
    "from selenium.webdriver import ActionChains;\n",
    "from selenium.webdriver.support.wait import WebDriverWait; ##https://selenium-python.readthedocs.io/waits.html\n",
    "from selenium.webdriver.support import expected_conditions as EC;\n",
    "from selenium.common.exceptions import WebDriverException #https://stackoverflow.com/questions/26943847/check-whether-element-is-clickable-in-selenium\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4261f811",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################################################################################################\n",
    "##Setting up the Directories\n",
    "################################################################################################################\n",
    "import os;\n",
    "# checking current working directory\n",
    "os.getcwd();\n",
    "os.chdir('/Users/arkeybarnett');\n",
    "#os.chdir('/Users/Success');\n",
    "from pathlib import Path;\n",
    "ROOT = Path('Dropbox/IGNITE/2input_data/saginaw_courts');\n",
    "#ROOT.mkdir()\n",
    "#import time\n",
    "#from openpyxl import load_workbook\n",
    "OUT  = (ROOT / '3ROAs/pre_covid_crime').resolve()\n",
    "Monitor = (ROOT / '5output').resolve()\n",
    "case_info = Monitor / f\"precovcrime_cases11.csv\";\n",
    "source_pre = ROOT / '3ROAs/pre_covid_crime';\n",
    "source_post = ROOT / '3ROAs';\n",
    "Case_numbers = ROOT / '2case_number';\n",
    "##inputting case numbers\n",
    "##Note the new txt files,\n",
    "with open(Case_numbers / 'sag_case_precrim_repull.txt', 'r') as txt:\n",
    "    active_pre = [x.strip() for x in txt.readlines()];\n",
    "print(active_pre);\n",
    "len(active_pre);\n",
    "with open(Case_numbers / 'sag_case_postcrim_repull.txt', 'r') as txt:\n",
    "    active_post = [x.strip() for x in txt.readlines()];\n",
    "print(active_post);\n",
    "len(active_post);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7674b70d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2d2fc81",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a379d4e2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a20fe7b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0eb5543",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################################################################################################\n",
    "##OFFICIAL\n",
    "##Parsing the ROAs: The unpulled ROAs PreCrime\n",
    "#ex)\n",
    "#13001087SM\n",
    "#19004516OM\n",
    "#19007466OM\n",
    "#19002475OM using post\n",
    "################################################################################################################\n",
    "################Creating Program to Parse the ROAs\n",
    "def sag_roa_parser_pre(file):\n",
    "\trecords = {}\n",
    "\ttry:\n",
    "\t\twith open(source_pre/file, \"r\") as f:\n",
    "\t\t\tpage = f.read()\n",
    "\t\t\thtml = etree.HTML(page)\n",
    "\t\t\t#case level info\n",
    "\t\t\trecords[\"case_id\"] = file.strip('.html')\n",
    "\t\t\tcase = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[2].strip('\\n ')            \n",
    "\t\t\trecords[\"roa_case_id\"] = case #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\t#Court Information\n",
    "\t\t\trecords[\"court\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[5].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"date_filed\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[8].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"case_type\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[11].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"case_status\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[14].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"entitlement\"] = html.xpath('//div[@class=\"col-md-12\"]/p//text()')[0].strip('\\n ').strip(case).strip(\"| \") #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\n",
    "\t\t\t#Party\n",
    "\t\t\trecords[\"defendent\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-8\"]/p//text()')[3].strip('\\n ')\n",
    "\t\t\t#....try (doesnt work for case X)\n",
    "\t\t\ttry:\n",
    "\t\t\t\trecords[\"attorney_name\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-4\"]/div[@class=\"tyler-toggle-container\"]/div/div/div/div//text()')[0].strip('\\n ')\n",
    "\t\t\texcept:\n",
    "\t\t\t\trecords[\"attorney_name\"] = \"\"      \n",
    "\t\t\t#....try (doesnt work for case X)\n",
    "\t\t\ttry:\n",
    "\t\t\t\trecords[\"attorney_type\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-4\"]/div[@class=\"tyler-toggle-container\"]/div/p//text()')[0].strip('\\n ')\n",
    "\t\t\texcept:\n",
    "\t\t\t\ttry:\n",
    "\t\t\t\t\trecords[\"attorney_type\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-4\"]/div[@class=\"tyler-toggle-container\"]/div[@class=\"tyler-bold\"]//text()')[0].strip('\\n ')\n",
    "\t\t\t\texcept:\n",
    "\t\t\t\t\trecords[\"attorney_type\"] = \"\"\n",
    "\n",
    "        \n",
    "\t\t\t#Charges #relevant for criminal cases, need loop\n",
    "\t\t\tc=1\n",
    "\t\t\tcharge_grp= html.xpath('//div[@id=\"chargeInformationDiv\"]/div/div/div/table/tbody/tr[not(@style)]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\tprint(charge_grp)\n",
    "\t\t\tfor charge in charge_grp:\n",
    "\t\t\t\trecords[\"c_charge_num{0}\".format(c)] = charge.xpath('./td[2]//text()')\n",
    "\t\t\t\trecords[\"c_current_charge{0}\".format(c)] = charge.xpath('./td[3]//text()')\n",
    "\t\t\t\trecords[\"c_charge_stat{0}\".format(c)] = charge.xpath('./td[4]//text()')\n",
    "\t\t\t\trecords[\"c_charge_level{0}\".format(c)] = charge.xpath('./td[5]//text()')\n",
    "\t\t\t\trecords[\"c_charge_date{0}\".format(c)] = charge.xpath('./td[6]//text()')\n",
    "\t\t\t\tc+=1  \n",
    "        \n",
    "\t\t\t#Bond Settings\n",
    "\t\t\t#....try (doesnt work for case X)\n",
    "\t\t\tbs=1\n",
    "\t\t\ttry:\n",
    "\t\t\t\tbond_setdate = html.xpath('//div[@id=\"BondSettingsGrid\"]/table/tbody//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\texcept:\n",
    "\t\t\t\tbond_setdate = \"\"     \n",
    "\t\t\tprint(bond_setdate)\n",
    "\t\t\tfor bonds in bond_setdate:\n",
    "\t\t\t\trecords[\"bs_bond_setdate{0}\".format(bs)] = bonds ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\tbs+=1\n",
    "\t\t\t#Bond Details\n",
    "\t\t\tb=1\n",
    "\t\t\ttry:   \n",
    "\t\t\t\tbond_det = html.xpath('//div[@id=\"BondsGrid\"]/table[@cellspacing=\"0\"]/tbody/tr[1]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\texcept:\n",
    "\t\t\t\tbond_det = \"\" \n",
    "\t\t\t\trecords[\"b_bond_type{0}\".format(b)] = \"\" \n",
    "\t\t\t\trecords[\"b_bond_num{0}\".format(b)] = \"\"\n",
    "\t\t\t\trecords[\"b_bond_amnt{0}\".format(b)] = \"\"\n",
    "\t\t\t\trecords[\"b_bond_stat{0}\".format(b)] = \"\"         \n",
    "\t\t\tif bond_det != \"\": \n",
    "\t\t\t\tprint(bond_det)\n",
    "\t\t\t\tfor bond in bond_det:\n",
    "\t\t\t\t\trecords[\"b_bond_type{0}\".format(b)] = bond.xpath('./td[1]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class   \n",
    "\t\t\t\t\trecords[\"b_bond_num{0}\".format(b)] = bond.xpath('./td[2]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\t\trecords[\"b_bond_amnt{0}\".format(b)] = bond.xpath('./td[3]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\t\trecords[\"b_bond_stat{0}\".format(b)] = bond.xpath('./td[4]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class       \n",
    "\t\t\t\t\tb+=1 \n",
    "\n",
    "\t\t\t#Disposition Events\n",
    "\t\t\td=1\n",
    "\t\t\tdisp_event = html.xpath('//div[@id=\"dispositionInformationDiv\"]/div[@class=\"row-buff\"]/div[@class=\"row-buff\"]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\tprint(disp_event)\n",
    "\t\t\tfor disp in disp_event:\n",
    "\t\t\t\trecords[\"d_disp_time_event{0}\".format(d)] = [\",\".join([str(disp.xpath('./div[@class=\"tyler-toggle-controller open\"]/p[@class=\"text-primary\"][1]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class             \n",
    "\t\t\t\trecords[\"d_disp_charge_action{0}\".format(d)] = [\",\".join([str(disp.xpath('./div[@class=\"tyler-toggle-container row-buff\"]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\td+=1    \n",
    "\n",
    "\t\t\t#events and hearings\n",
    "\t\t\te=1\n",
    "\t\t\tevent_grp = html.xpath('//div[@id=\"eventsInformationDiv\"]/ul[@class=\"list-group\"]/li[@class=\"list-group-item\"]/div[@class=\"portal-case-event\"]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\tprint(event_grp)\n",
    "\t\t\tfor eve in event_grp:\n",
    "\t\t\t\trecords[\"e_event_date{0}\".format(e)] = [\",\".join([str(eve.xpath('./div/p[@class=\"text-primary\"]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\trecords[\"e_event_comment{0}\".format(e)] = [\",\".join([str(eve.xpath('./div[@class=\"tyler-toggle-container row-buff\"]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\te+=1      \n",
    "\texcept:\n",
    "\t\tprint(file);\n",
    "\trecords = pd.DataFrame([records], columns=records.keys());\n",
    "\treturn(records);\n",
    "\n",
    "################Performing Program to Parse the ROAs\n",
    "repull = active_pre\n",
    "\n",
    "##creating empty list to add file names to and iterate over\n",
    "files=[];\n",
    "##pulling the file names of the saved ROAs\n",
    "for file in repull:\n",
    "\tfiles.append(file);\n",
    "\n",
    "print(files);    \n",
    "\n",
    "data = sag_roa_parser_pre(files[0]) ##The first entry is empty\n",
    "for ft in files[1:]:\n",
    "\tf = ft + '.html'\n",
    "\tdata = pd.concat([data,sag_roa_parser_pre(f)]);\n",
    "\n",
    "################Saving dataframe into excel spreadsheets for analysis in STATA   \n",
    "data.head();\n",
    "data.to_excel(Monitor/'roasparsed_precrim_repull.xlsx', index=False);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05e1c83f",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################################################################################################\n",
    "##Parsing the ROAs: The unpulled ROAs PostCrime\n",
    "#ex)\n",
    "#13001087SM\n",
    "#19004516OM\n",
    "#19007466OM\n",
    "#19002475OM using post\n",
    "################################################################################################################\n",
    "################Creating Program to Parse the ROAs\n",
    "def sag_roa_parser_post(file):\n",
    "\trecords = {}\n",
    "\ttry:\n",
    "\t\twith open(source_post/file, \"r\") as f:\n",
    "\t\t\tpage = f.read()\n",
    "\t\t\thtml = etree.HTML(page)\n",
    "\t\t\t#case level info\n",
    "\t\t\trecords[\"case_id\"] = file.strip('.html')\n",
    "\t\t\tcase = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[2].strip('\\n ')            \n",
    "\t\t\trecords[\"roa_case_id\"] = case #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\t#Court Information\n",
    "\t\t\trecords[\"court\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[5].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"date_filed\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[8].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"case_type\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[11].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"case_status\"] = html.xpath('//div[@class=\"col-md-4\"]/p//text()')[14].strip('\\n ') #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\t\t\trecords[\"entitlement\"] = html.xpath('//div[@class=\"col-md-12\"]/p//text()')[0].strip('\\n ').strip(case).strip(\"| \") #https://stackoverflow.com/questions/29555452/lxml-doesnt-get-all-text-in-element-if-text-has-br\n",
    "\n",
    "\t\t\t#Party\n",
    "\t\t\trecords[\"defendent\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-8\"]/p//text()')[3].strip('\\n ')\n",
    "\t\t\t#....try (doesnt work for case X)\n",
    "\t\t\ttry:\n",
    "\t\t\t\trecords[\"attorney_name\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-4\"]/div[@class=\"tyler-toggle-container\"]/div/div/div/div//text()')[0].strip('\\n ')\n",
    "\t\t\texcept:\n",
    "\t\t\t\trecords[\"attorney_name\"] = \"\"      \n",
    "\t\t\t#....try (doesnt work for case X)\n",
    "\t\t\ttry:\n",
    "\t\t\t\trecords[\"attorney_type\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-4\"]/div[@class=\"tyler-toggle-container\"]/div/p//text()')[0].strip('\\n ')\n",
    "\t\t\texcept:\n",
    "\t\t\t\ttry:\n",
    "\t\t\t\t\trecords[\"attorney_type\"] = html.xpath('//div[@id=\"divPartyInformation_body\"]//div[@class=\"col-md-4\"]/div[@class=\"tyler-toggle-container\"]/div[@class=\"tyler-bold\"]//text()')[0].strip('\\n ')\n",
    "\t\t\t\texcept:\n",
    "\t\t\t\t\trecords[\"attorney_type\"] = \"\"\n",
    "\n",
    "        \n",
    "\t\t\t#Charges #relevant for criminal cases, need loop\n",
    "\t\t\tc=1\n",
    "\t\t\tcharge_grp= html.xpath('//div[@id=\"chargeInformationDiv\"]/div/div/div/table/tbody/tr[not(@style)]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\tprint(charge_grp)\n",
    "\t\t\tfor charge in charge_grp:\n",
    "\t\t\t\trecords[\"c_charge_num{0}\".format(c)] = charge.xpath('./td[2]//text()')\n",
    "\t\t\t\trecords[\"c_current_charge{0}\".format(c)] = charge.xpath('./td[3]//text()')\n",
    "\t\t\t\trecords[\"c_charge_stat{0}\".format(c)] = charge.xpath('./td[4]//text()')\n",
    "\t\t\t\trecords[\"c_charge_level{0}\".format(c)] = charge.xpath('./td[5]//text()')\n",
    "\t\t\t\trecords[\"c_charge_date{0}\".format(c)] = charge.xpath('./td[6]//text()')\n",
    "\t\t\t\tc+=1  \n",
    "        \n",
    "\t\t\t#Bond Settings\n",
    "\t\t\t#....try (doesnt work for case X)\n",
    "\t\t\tbs=1\n",
    "\t\t\ttry:\n",
    "\t\t\t\tbond_setdate = html.xpath('//div[@id=\"BondSettingsGrid\"]/table/tbody//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\texcept:\n",
    "\t\t\t\tbond_setdate = \"\"     \n",
    "\t\t\tprint(bond_setdate)\n",
    "\t\t\tfor bonds in bond_setdate:\n",
    "\t\t\t\trecords[\"bs_bond_setdate{0}\".format(bs)] = bonds ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\tbs+=1\n",
    "\t\t\t#Bond Details\n",
    "\t\t\tb=1\n",
    "\t\t\ttry:   \n",
    "\t\t\t\tbond_det = html.xpath('//div[@id=\"BondsGrid\"]/table[@cellspacing=\"0\"]/tbody/tr[1]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\texcept:\n",
    "\t\t\t\tbond_det = \"\" \n",
    "\t\t\t\trecords[\"b_bond_type{0}\".format(b)] = \"\" \n",
    "\t\t\t\trecords[\"b_bond_num{0}\".format(b)] = \"\"\n",
    "\t\t\t\trecords[\"b_bond_amnt{0}\".format(b)] = \"\"\n",
    "\t\t\t\trecords[\"b_bond_stat{0}\".format(b)] = \"\"         \n",
    "\t\t\tif bond_det != \"\": \n",
    "\t\t\t\tprint(bond_det)\n",
    "\t\t\t\tfor bond in bond_det:\n",
    "\t\t\t\t\trecords[\"b_bond_type{0}\".format(b)] = bond.xpath('./td[1]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class   \n",
    "\t\t\t\t\trecords[\"b_bond_num{0}\".format(b)] = bond.xpath('./td[2]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\t\trecords[\"b_bond_amnt{0}\".format(b)] = bond.xpath('./td[3]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\t\trecords[\"b_bond_stat{0}\".format(b)] = bond.xpath('./td[4]//text()') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class       \n",
    "\t\t\t\t\tb+=1 \n",
    "\n",
    "\t\t\t#Disposition Events\n",
    "\t\t\td=1\n",
    "\t\t\tdisp_event = html.xpath('//div[@id=\"dispositionInformationDiv\"]/div[@class=\"row-buff\"]/div[@class=\"row-buff\"]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\tprint(disp_event)\n",
    "\t\t\tfor disp in disp_event:\n",
    "\t\t\t\trecords[\"d_disp_time_event{0}\".format(d)] = [\",\".join([str(disp.xpath('./div[@class=\"tyler-toggle-controller open\"]/p[@class=\"text-primary\"][1]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class             \n",
    "\t\t\t\trecords[\"d_disp_charge_action{0}\".format(d)] = [\",\".join([str(disp.xpath('./div[@class=\"tyler-toggle-container row-buff\"]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\td+=1    \n",
    "\n",
    "\t\t\t#events and hearings\n",
    "\t\t\te=1\n",
    "\t\t\tevent_grp = html.xpath('//div[@id=\"eventsInformationDiv\"]/ul[@class=\"list-group\"]/li[@class=\"list-group-item\"]/div[@class=\"portal-case-event\"]') ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\tprint(event_grp)\n",
    "\t\t\tfor eve in event_grp:\n",
    "\t\t\t\trecords[\"e_event_date{0}\".format(e)] = [\",\".join([str(eve.xpath('./div/p[@class=\"text-primary\"]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\trecords[\"e_event_comment{0}\".format(e)] = [\",\".join([str(eve.xpath('./div[@class=\"tyler-toggle-container row-buff\"]//text()'))])] ##https://stackoverflow.com/questions/2404130/xpath-to-find-elements-that-does-not-have-an-id-or-class\n",
    "\t\t\t\te+=1      \n",
    "\texcept:\n",
    "\t\tprint(file);\n",
    "\trecords = pd.DataFrame([records], columns=records.keys());\n",
    "\treturn(records);\n",
    "\n",
    "################Performing Program to Parse the ROAs\n",
    "repull = active_post\n",
    "\n",
    "##creating empty list to add file names to and iterate over\n",
    "files=[];\n",
    "##pulling the file names of the saved ROAs\n",
    "for file in repull:\n",
    "\tfiles.append(file);\n",
    "\n",
    "print(files);    \n",
    "\n",
    "data = sag_roa_parser_post(files[0]) ##The first entry is empty\n",
    "for ft in files[1:]:\n",
    "\tf = ft + '.html'\n",
    "\tdata = pd.concat([data,sag_roa_parser_post(f)]);\n",
    "\n",
    "################Saving dataframe into excel spreadsheets for analysis in STATA   \n",
    "data.head();\n",
    "data.to_excel(Monitor/'roasparsed_postcrim_repull.xlsx', index=False);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd35ac2e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
